cd "D:\Dropbox\book_welfare\replication"


****************************************************************
* Overall supply: unknown, unk+ men, all Bookstat 
****************************************************************


use data\bookstat_gender_pyear_genre_asin.dta, clear 
	collapse (sum) males = N_name name_present=N_name total, by(pyear)
	
	gen nunk = total  - name_present 
	gen nmale = males 
	gen nfemale = name_present-nmale 
	
	gen n1 = nunk 
	gen n2 = nunk+nmale 
	gen n3 = total 
	
	twoway (line n3 pyear) if pyear>=1960 & pyear<=2020,  scheme(lean2) ytitle(new books) xtitle(publication year) t1(Bookstat)
	
	twoway (line nmale pyear) (line nfemale pyear) if pyear>=1960 & pyear<=2020,  scheme(lean2) ytitle(new books) xtitle(publication year) t1(Bookstat)	 legend(order(1 "male author" 2 "female author"))
	
	twoway (line n1 pyear) (line n2 pyear)  (line n3 pyear) if pyear>=1960 & pyear<=2020,  scheme(lean2) legend(order(1 "unknown gender" 2 "+ male" 3 "+female"))
	
	gen nid = nmale + nfemale 
	twoway (line nfemale pyear) (line nid pyear)   if pyear>=1920 & pyear<=2020,  scheme(lean2) legend(order(1 "female" 3 "+male")) yscale(log)
	
	twoway (line nfemale pyear) (line nmale pyear)   if pyear>=1920 & pyear<=2020,  scheme(lean2) legend(order(1 "female" 2 "male")) yscale(log) ytitle(log scale)

****************************************************************
* Female share of supply graph comparing Goodreads and Bookstat - overall
****************************************************************



 * GR first: female rel to all 
use  "data\books_year.dta", clear  


		keep if year>=2007 & year<=2016 
		keep if pubyr >=1960 & pubyr<=2016 
		**************************************
		replace fbookp=2 if fbookp==. 
		*************************************
		collapse (count) n=q (sum) qf qm q , by(book_id fbookp genre1 pubyr)
		collapse (count) n (sum) q qf qm, by( fbookp pubyr)

	rename pubyr pyear 
	egen Nv = sum(n), by(pyear)
	gen ngrf = n/Nv
	keep if fbook==1 
	
	keep ngrf pyear 

twoway (line ngrf  pyear) if (pyear>=1960 & pyear<=2016), scheme(lean2) ytitle(share written by women) xtitle(publication year)

tempfile xgr 
save `xgr'

 * GR again: female rel to identified 
use  "data\books_year.dta", clear  


		keep if year>=2007 & year<=2016 
		keep if pubyr >=1960 & pubyr<=2016 
		**************************************
		drop if fbookp==. 
		*************************************
		collapse (count) n=q (sum) qf qm q , by(book_id fbook genre1 pubyr)
		collapse (count) n (sum) q qf qm, by( fbook pubyr)

	rename pubyr pyear 
	egen Nv1 = sum(n), by(pyear)
	gen ngrf1 = n/Nv
	keep if fbook==1 
	
	keep ngrf1 pyear 

twoway (line ngrf1  pyear) if (pyear>=1960 & pyear<=2016), scheme(lean2) ytitle(share written by women) xtitle(publication year)

merge 1:1 pyear using `xgr'
twoway (line ngrf  pyear)  (line ngrf1  pyear) if (pyear>=1960 & pyear<=2016), scheme(lean2) ytitle(share written by women) xtitle(publication year) legend(order(1 "vs all" 2 "vs id'd" ))

drop _merge 
tempfile gr 
save `gr'




******************************
* Now, bookstat
******************************


use   data\bookstat_gender_pyear_genre_asin.dta, clear


	collapse (sum) males=N_male name_present=N_name total, by(pyear)
	
	
		gen nbsf =  1 - males/name_present 	
		gen nbsf1 =  1 - males/total  	

		*gen sf = 1 - qm/qname 
		* drop _merge 
		merge 1:1 pyear using `gr'

twoway ( line nbsf pyear )  ( line ngrf pyear ) if pyear>=1960 & pyear<=2021, scheme(lean2) ///
xtitle(publication year) ytitle(female shares) legend(order(1 "Bookstat" 2 "Goodreads")) t1()

twoway ( line nbsf1 pyear )  ( line ngrf1 pyear ) if pyear>=1960 & pyear<=2021, scheme(lean2) ///
xtitle(publication year) ytitle(female shares) legend(order(1 "Bookstat" 2 "Goodreads"))


twoway ( line nbsf pyear )  ( line nbsf1 pyear ) if pyear>=1960 & pyear<=2021, scheme(lean2) ///
xtitle(publication year) ytitle(female shares) legend(order(1 "among id'd" 2 "among all"))

* graph export latex_text\figures\female_share_supply_bookstat_goodreads.pdf, as(pdf) name("Graph") replace 

drop _merge 

keep nbsf nbsf1 ngrf ngrf1 pyear 
tempfile stuff 
save `stuff'

preserve 
	use 	data\reg_sumary.dta, clear 

	keep if cleantype=="Non-dramatic literary work"

	gen regs_id = Nfemale/Nmatch
	gen regs_all = Nfemale/N 
	rename year pyear 
	tempfile co 
	save `co'
restore 

use `co', clear 

merge 1:1 pyear using `stuff'

gsort pyear 
twoway ( line nbsf1 pyear )  ( line ngrf1 pyear ) ( line regs_id pyear ) if pyear>=1960 & pyear<=2021, scheme(lean2) ///
xtitle(publication year) ytitle(female shares)  t1(among gender-identified)  legend(off)

 graph save "Graph" "data\one.gph", replace

twoway ( line nbsf pyear )  ( line ngrf pyear )  ( line regs_all pyear ) if pyear>=1960 & pyear<=2021, scheme(lean2) ///
xtitle(publication year) ytitle(female shares)  t1(among all)     legend(pos(3) col(1) lab(1 "Bookstat") lab(2 "Goodreads")  lab(3 "copyright regs") stack)
 graph save "Graph" "data\two.gph", replace

 *legend(order(1 "Bookstat" 2 "Goodreads" 3 "copyright regs")) 
 
 gr combine data\one.gph data\two.gph, scheme(lean2) 
 
graph export latex_text\figures\female_share_supply_bookstat_goodreads_co_asin.pdf, as(pdf) name("Graph") replace 



corr nbsf ngrf

